## About
# Here I analyze the profilometer data.
# I calculate the mean thickness and the bottom thickness.
# I only include specifical intervals in the calculations,
# because those are the intervals that were leveled by the Vision 64 software.
# Since the substrate are not flat and leveling works with linear functions, not
# all of the samples could be leveled at once.

## !! Warning: works only properly when there are not other csv files in the folder (like the statistics output)

## Clean environment
rm(list=ls())

## Init
library(ggplot2)
library(latex2exp) # To use latex in plots
library(stringr) # package for regular expressions
#library(pracma) # to find local minima and maxima (alternative: findPeaks or findValleys in quantmod package)
library(quantmod) # findValleys/findPeaks

cbPalette = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## Read data
files <- list.files(path="//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15", 
                    pattern="*.csv", full.names=TRUE, recursive=FALSE)
resolution <- 0.833278 # scan resolution in um

## Create empty lists to store the data
sampleFull_lst <- vector("list", length(files))
thickness_lst <- vector("list", length(files))
std_lst <- vector("list", length(files))
sem_lst <- vector("list", length(files))
baseline_lst <- vector("list", length(files))
baseline_sd_lst <- vector("list", length(files))
baseline_sem_lst <- vector("list", length(files))
peaks_mean_lst <- vector("list", length(files))
peaks_sd_lst <- vector("list", length(files))
peaks_sem_lst <- vector("list", length(files))
n_lst <- vector("list", length(files))
npeaks_lst <- vector("list", length(files))
nbaseline_lst <- vector("list", length(files))

## Calculations
library(stringr) # package for regular expressions
#library(pracma) # to find local minima and maxima (alternative: findPeaks or findValleys in quantmod package)
library(quantmod) # findValleys/findPeaks
for (i in 1:length(files)) {
  if (str_detect(files[i], "stats")){ # ignore file if it is a statistics file
    next
  }
  ## Extract sample name
  sampleName <- str_extract(files[i],"[a-zA-Z0-9()\\-\\_]*\\.csv$") # regex which extracts the sample from the path name.
  
  ## Extract leveled sample area
  leveled_range <- str_extract(files[i], "[0-9]+\\-[0-9]+mm") # regex which extracts the leveled area from the path name.
  x1 <- as.numeric(str_extract(leveled_range, "^[0-9]+")) # start of string (first number)
  x2 <- as.numeric(str_extract(leveled_range, "[0-9]+(?=mm)")) # second number (preceded by mm)
  
  ## read file
  profil <- read.csv(file=files[i], header=TRUE, sep=",",dec=".",skip=22) # load file
  
  ## calculations
  profi_data_um <- profil[round(5000/resolution):round(13000/resolution),1:2]*0.0001 # convert data to microns and only take central 700 um
  colnames(profi_data_um) <- c("x", "Height") # Modify column names
  profi_data_um$x_um <- profi_data_um$x*1e4
  profi_data_um_full <- profil[,1:2]*0.0001 # Full profiles
  colnames(profi_data_um_full) <- c("x", "Height") # Modify column names
  
  # Calculate mean thickness and baseline values
  thickness_mean <- mean(profi_data_um$Height, na.rm = TRUE)
  thickness_std <- sd(profi_data_um$Height, na.rm = TRUE)
  data_cleaned <- na.omit(profi_data_um$Height)
  thickness_sem <- thickness_std/sqrt(length(data_cleaned))
  
  peaks_x <- findPeaks(profi_data_um$Height, thresh = 0)
  peaks <- profi_data_um$Height[peaks_x]
  peaks2 <- peaks[peaks > 10] # 10 um as an arbitrary threshold for aggregates/unexfoliated flakes
  peaks_mean_lst[i] <- mean(peaks2)
  peaks_sd_lst[i] <- sd(peaks2)
  peaks_sem_lst[i] <- sd(peaks2)/sqrt(length(peaks2))
  
  lows_x <- findValleys(profi_data_um$Height, thresh = 0)
  lows <- profi_data_um$Height[lows_x]
  lows2 <- lows[lows > 0  & lows <= mean(peaks2)] # exclude negative data (likely corresponding to pinholes) and shoulder peaks
  baseline_lst[i] <- mean(lows2)
  baseline_sd_lst[i] <- sd(lows2)
  baseline_sem_lst[i] <- sd(lows2)/sqrt(length(lows2))
  
  ## store object in the list, by name
  sampleFull_lst[i] <- sampleName
  thickness_lst[i] <- thickness_mean
  std_lst[i] <- thickness_std
  sem_lst[i] <- thickness_sem
  n_lst[i] <- length(data_cleaned)
  npeaks_lst[i] <- length(peaks2)
  nbaseline_lst[i] <- length(lows2)
  
  ## Plots of profile
  # plots wrapped in print statement because otherwise an empty pdf is created, because autoprinting is turned off in loops (see https://stackoverflow.com/questions/5882204/print-to-pdf-in-a-for-loop)
  # Area of interest
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, ".pdf", sep = ""),
      width = 7*16/9, height = 7)
  # tiff(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, ".tif", sep = ""),
  #     width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          ylim(0,60) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # area of interest + baseline
  baseline.data <- data.frame(yintercept = c(baseline_lst[[i]]), Lines = c("Baseline"))
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_baseline.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, baseline.data) +
          ylim(0,60) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # area of interest + baseline/mean/peaks
  line.data <- data.frame(yintercept = c(baseline_lst[[i]], thickness_lst[[i]], peaks_mean_lst[[i]]), Lines = c("Baseline", "Mean", "Peaks mean"))
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_hlines.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
          ylim(0,60) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # full profile + lines
  line.data <- data.frame(yintercept = c(baseline_lst[[i]], thickness_lst[[i]], peaks_mean_lst[[i]]), Lines = c("Baseline", "Mean", "Peaks mean"))
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_full_hlines.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
          ylim(0,60) +
          xlim(0,1.5) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  # full profile + lines (y scale corresponding to CCD series)
  line.data <- data.frame(yintercept = c(baseline_lst[[i]], thickness_lst[[i]], peaks_mean_lst[[i]]), Lines = c("Baseline", "Mean", "Peaks mean"))
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_full_hlines_y100.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          # geom_hline(aes(yintercept = baseline_lst[[i]]), size = 1.5, color = cbPalette[2], linetype = "solid", show.legend = "Baseline") +
          # geom_hline(aes(yintercept = thickness_lst[[i]]), size = 1.5, color = cbPalette[3], linetype = "dashed", show.legend = "Mean") +
          # geom_hline(aes(yintercept = peaks_mean_lst[[i]]), size = 1.5, color = cbPalette[4], linetype = "dotted", show.legend = "Peaks mean") +
          geom_hline(aes(yintercept = yintercept, color = Lines, linetype = Lines), size = 1.5, line.data) +
          ylim(0,100) +
          xlim(0,1.5) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          scale_color_manual(values = cbPalette[2:length(cbPalette)]) +
          labs(title = "") +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  
  # full profile
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_full.pdf", sep = ""),
      width = 7*16/9, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um_full, aes(x = x, y = Height)) +
          geom_line(size = 2) +
          xlab(TeX("x ($cm$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
  
  
  # 60 x 60 um
  pdf(paste("//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Profiles/", sampleName, "_um.pdf", sep = ""),
      width = 7, height = 7)
  print(profil_plot <- ggplot(data = profi_data_um, aes(x = x_um, y = Height)) +
          geom_point(size = 2) +
          xlim(x1*1000,x1*1000+30) +
          ylim(0, 60) +
          xlab(TeX("x ($\\mu m$)")) +
          ylab(TeX("Height ($\\mu m$)")) +
          theme_bw(base_size = 24))
  profil_plot
  dev.off()
}

## Convert lists into dataframe columns 
df <- data.frame(unlist(sampleFull_lst), unlist(thickness_lst), unlist(std_lst),
                 unlist(sem_lst), unlist(n_lst), unlist(peaks_mean_lst), unlist(peaks_sd_lst),
                 unlist(peaks_sem_lst), unlist(npeaks_lst), unlist(baseline_lst), unlist(baseline_sd_lst),
                 unlist(baseline_sem_lst), unlist(nbaseline_lst)) 
# Names of columns of dataframe 
names(df) <- c("Sample", "Thickness_mean", "Thickness_sd", "Thickness_sem", "Number_points",
               "Peaks_mean", "Peaks_sd", "Peaks_sem", "Number_peak_points",
               "Baseline_mean", "Baseline_sd", "Baseline_sem", "Number_baseline_points") 
# print(df) 

## Write to csv
write.table(df, "//st-a2355/tomo5/Hazendonk_Laura/1_StretchableGink/Data/Profilometer/LH_PU_VTT_2021-01-15/Stats/LH_PU_VTT_2021-01-15_stats_20210208.csv",
            dec = ",", sep=";", col.names = TRUE, row.names = FALSE)
